# train the model with 500M parameters

#!/bin/bash

PROMPT_VERSION=qwen
LLM_MODEL_PATH="checkpoints/Qwen2.5-0.5B-Instruct"
VE_MODEL_PATH="checkpoints/aimv2-large-patch14-224"
EXP_NAME="qwen2-500m-aimv2"
IMG_SIZE=224
PROJ_TYPE=mlp2x_gelu

    

deepspeed train.py \
    --deepspeed ./trains/zero2.json \
    --llm_model_path $LLM_MODEL_PATH \
    --ve_model_path $VE_MODEL_PATH \
    --version $PROMPT_VERSION \
    --data_path ./data/sft/share-captioner_coco_lcs_sam_1246k_1107.json \
    ./data/sft/wanjuan_ocr_b3.json \
    ./data/sft/llava_recap_558k.json \
    --image_folder ./data/images_all \
    --conn_ve_llm_type $PROJ_TYPE \
    --tune_conn_ve_llm True \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --output_dir ./checkpoints/namo-$EXP_NAME-conn-ve-$PROJ_TYPE \
    --num_train_epochs 1 \
    --per_device_train_batch_size 3 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 18 \
    --eval_strategy "no" \
    --save_strategy "steps" \
    --save_steps 500 \
    --save_total_limit 1 \
    --learning_rate 3e-5 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 False \
    --fp16 False \
    --bf16 True \
    --model_max_length 1900 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True

deepspeed train.py \
    --deepspeed ./trains/zero2_npu.json \
    --ve_lr 0.2e-5 \
    --conn_ve_llm_lr 2e-5 \
    --llm_model_path $LLM_MODEL_PATH \
    --ve_model_path $VE_MODEL_PATH \
    --version $PROMPT_VERSION \
    --data_path ./data/sft/share-captioner_coco_lcs_sam_1246k_1107.json \
    ./data/sft/llava_recap_558k.json \
    ./data/sft/textmonkey_pretrain.json \
    ./data/sft/wanjuan_ocr_b3.json \
    ./data/sft/inhouse_crop_b1.json \
    ./data/sft/mtwi_ocr_20k_ocr.json \
    ./data/sft/icdar2019_lsvt_50k_ocr.json \
    ./data/sft/mmc.json \
    ./data/sft/icdar_mlt_ocr.json \
    ./data/sft/autoposter_76k_ocr.json \
    --image_folder ./data/images_all \
    --unfreeze_ve True \
    --conn_ve_llm_type $PROJ_TYPE \
    --pretrain_conn_ve_llm_path ./checkpoints/namo-$EXP_NAME-conn-ve-$PROJ_TYPE/ \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --tf32 False \
    --fp16 False \
    --bf16 True \
    --output_dir ./checkpoints/namo-$EXP_NAME-$PROJ_TYPE \
    --num_train_epochs 1 \
    --per_device_train_batch_size 5 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 8 \
    --eval_strategy "no" \
    --save_strategy "steps" \
    --save_steps 500 \
    --save_total_limit 1 \
    --learning_rate 4e-5 \
    --weight_decay 0. \
    --warmup_steps 360 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True
